#pragma once
#include "SplitTool.h"
#include "Configuration.h"

//网页类
class WebPage{
public:
    WebPage(string &doc, Configuration *config, SplitTool &splitTool);
    WebPage(string &doc);
    WebPage();
    ~WebPage();
    int getDocId();
    string getDoc();
    string getDocContent();
    map<string, int> &getWordsMap();
    void getTopK(vector<string> &topKList, int k);
    friend bool operator<(const WebPage &lhs, const WebPage &rhs){
        return lhs._docid < rhs._docid;
    }
private:
    string _doc;     //从文件中读取和要写入文件的网页内容
    int _docid;      //文档id
    string _title;   //标题
    string _url;     //链接
    string _content; //正文
    /* string _summary; //摘要 */
    vector<string> _topWords;   //词频最高的前20个词
    map<string, int> _wordsMap; //文档所有词的词频
};

